{"learning_rate_constant": 0.1, "multiproblem_per_task_threshold": "", "conv_first_kernel": 3, "eval_drop_long_sequences": false, "action_vocab_size": 5, "use_fixed_batch_size": false, "use_additional_loss": false, "split_targets_strided_training": false, "no_data_parallelism": false, "freeze_reference_model": true, "weights_fn": {}, "force_full_predict": false, "learning_rate_decay_steps": 5000, "optimizer_adam_epsilon": 1e-09, "shared_embedding_and_softmax_weights": true, "unidirectional_encoder": false, "multiproblem_max_input_length": -1, "moe_hidden_sizes": "2048", "multiproblem_target_eval_only": false, "proximity_bias": false, "layer_prepostprocess_dropout_broadcast_dims": "", "area_key_mode": "none", "span_aggregation": "sum", "moe_loss_coef": 0.001, "batch_shuffle_size": 512, "max_length": 256, "norm_epsilon": 1e-06, "memory_height": 1, "screen_encoder_layers": 6, "factored_logits": false, "split_targets_max_chunks": 100, "daisy_chain_variables": true, "optimizer_adafactor_multiply_by_parameter_scale": true, "scheduled_sampling_method": "parallel", "learning_rate_decay_staircase": false, "multiply_embedding_mode": "sqrt_depth", "use_custom_ops": true, "summarize_vars": false, "attention_dropout_broadcast_dims": "", "split_targets_chunk_length": 0, "use_target_space_embedding": false, "batch_size": 64, "weight_dtype": "float32", "filter_size": 512, "label_smoothing": 0.1, "multiproblem_fixed_train_length": -1, "clip_grad_norm": 0.0, "multiproblem_reweight_label_loss": false, "optimizer_multistep_accumulate_steps": 0, "moe_num_experts": 16, "sampling_keep_top_k": -1, "synthetic_screen_noise": 0.0, "mixed_precision_optimizer_loss_scaler": "exponential", "min_length": 0, "multiproblem_vocab_size": -1, "warm_start_from_second": "", "learning_rate_decay_scheme": "noam", "optimizer_adam_beta1": 0.9, "optimizer_adam_beta2": 0.997, "scheduled_sampling_warmup_steps": 50000, "grad_noise_scale": 0.0, "vocab_divisor": 1, "screen_encoder": "transformer", "pretrained_model_dir": "", "layer_preprocess_sequence": "n", "multiproblem_mixing_schedule": "constant", "optimizer_adafactor_beta1": 0.0, "mean_synthetic_length": 1.0, "num_joint_layers": 2, "parameter_attention_key_channels": 0, "causal_decoder_self_attention": true, "bottom": {}, "max_pixel_pos": 100, "heads_share_relative_embedding": false, "learning_rate_warmup_steps": 8000, "ffn_layer": "dense_relu_dense", "learning_rate": 0.2, "prepend_mode": "none", "multiproblem_max_target_length": -1, "eval_run_autoregressive": false, "learning_rate_decay_rate": 1.0, "multiproblem_label_weight": 0.5, "kernel_width": 1, "num_area_layers": 0, "attention_dropout": 0.4, "symbol_dropout": 0.0, "max_dom_pos": 500, "top": {}, "compress_steps": 0, "stddev_synthetic_length": 2.0, "learning_rate_minimum": null, "gpu_automatic_mixed_precision": false, "summarize_grads": false, "scheduled_sampling_prob": 0.0, "use_pad_remover": true, "reference_warmup_steps": 0, "scheduled_sampling_gold_mixin_prob": 0.5, "optimizer_adafactor_beta2": 0.999, "dis_loss_ratio": 0.01, "max_area_height": 1, "moe_k": 2, "task_vocab_size": 44462, "mixed_precision_optimizer_init_loss_scale": 32768, "weight_noise": 0.0, "initializer_gain": 1.0, "learning_rate_schedule": "constant*linear_warmup*rsqrt_decay", "layer_postprocess_sequence": "da", "min_length_bucket": 8, "attention_variables_3d": false, "split_to_length": 0, "obj_text_aggregation": "sum", "sampling_method": "argmax", "optimizer_momentum_momentum": 0.9, "load_screen": true, "pad_batch": false, "optimizer_adafactor_factored": true, "screen_embedding_feature": "text_pos_type_dom_click", "optimizer_zero_grads": false, "video_num_target_frames": 1, "activation_dtype": "float32", "max_target_seq_length": 0, "max_span": 13, "pack_dataset": false, "optimizer_adafactor_decay_type": "pow", "max_area_width": 1, "norm_type": "layer", "hard_attention_k": 0, "add_relative_to_values": false, "parameter_attention_value_channels": 0, "alignment": "dot_product_attention", "optimizer_momentum_nesterov": false, "multiproblem_schedule_max_examples": 10000000.0, "area_value_mode": "none", "num_hidden_layers": 6, "mlperf_mode": false, "relu_dropout": 0.3, "tpu_enable_host_call": false, "scheduled_sampling_warmup_schedule": "exp", "scheduled_sampling_num_passes": 1, "weight_decay": 0.0, "moe_overhead_train": 1.0, "initializer": "uniform_unit_scaling", "name": {}, "gan_update": "center", "compute_verb_obj_separately": true, "num_encoder_layers": 0, "optimizer": "adam", "num_decoder_layers": 0, "kernel_height": 3, "sampling_temp": 1.0, "self_attention_type": "dot_product", "loss": {}, "video_num_input_frames": 1, "dropout": 0.2, "max_relative_position": 0, "gumbel_noise_weight": 0.0, "optimizer_adafactor_memory_exponent": 0.8, "shared_embedding": false, "learning_rate_cosine_cycle_steps": 250000, "gen_loss_ratio": 0.01, "max_input_seq_length": 0, "layer_prepostprocess_dropout": 0.2, "moe_overhead_eval": 2.0, "overload_eval_metric_name": "", "attention_key_channels": 0, "optimizer_adafactor_clipping_threshold": 1.0, "hidden_size": 128, "relu_dropout_broadcast_dims": "", "length_bucket_step": 1.1, "multiproblem_schedule_threshold": 0.5, "symbol_modality_num_shards": 16, "attention_value_channels": 0, "num_heads": 8, "nbr_decoder_problems": 1, "pos": "timing"}
